Libraries
if (!require("renv")) install.packages("renv")
Loading required package: renv
Warning: package ‘renv’ was built under R version 4.3.3
Attaching package: ‘renv’
The following objects are masked from ‘package:stats’:
embed, update
The following objects are masked from ‘package:utils’:
history, upgrade
The following objects are masked from ‘package:base’:
autoload, load, remove
library(renv)
renv::restore()
The following package(s) will be updated:
# CRAN -----------------------------------------------------------------------
- arrow [* -> 15.0.1]
- assertthat [* -> 0.2.1]
- cpp11 [* -> 0.4.7]
- dplyr [* -> 1.1.4]
- fansi [* -> 1.0.6]
- knitr [* -> 1.43]
- progress [* -> 1.2.3]
- readr [* -> 2.1.5]
- rmarkdown [* -> 2.24]
- tidyselect [* -> 1.2.1]
- tinytex [* -> 0.49]
- utf8 [* -> 1.2.4]
- vctrs [* -> 0.6.5]
- vroom [* -> 1.6.5]
- withr [* -> 3.0.0]
# RSPM -----------------------------------------------------------------------
- base64enc [* -> 0.1-3]
- bit [* -> 4.0.5]
- bit64 [* -> 4.0.5]
- bslib [* -> 0.5.1]
- cachem [* -> 1.0.8]
- cli [* -> 3.6.1]
- clipr [* -> 0.8.0]
- crayon [* -> 1.5.2]
- digest [* -> 0.6.33]
- ellipsis [* -> 0.3.2]
- evaluate [* -> 0.21]
- fastmap [* -> 1.1.1]
- fontawesome [* -> 0.5.2]
- fs [* -> 1.6.3]
- generics [* -> 0.1.3]
- glue [* -> 1.6.2]
- here [* -> 1.0.1]
- highr [* -> 0.10]
- hms [* -> 1.1.3]
- htmltools [* -> 0.5.6]
- jquerylib [* -> 0.1.4]
- jsonlite [* -> 1.8.7]
- lifecycle [* -> 1.0.3]
- magrittr [* -> 2.0.3]
- memoise [* -> 2.0.1]
- mime [* -> 0.12]
- pillar [* -> 1.9.0]
- pkgconfig [* -> 2.0.3]
- prettyunits [* -> 1.2.0]
- purrr [* -> 1.0.2]
- R6 [* -> 2.5.1]
- rappdirs [* -> 0.3.3]
- rlang [* -> 1.1.1]
- rprojroot [* -> 2.0.4]
- sass [* -> 0.4.7]
- stringi [* -> 1.7.12]
- stringr [* -> 1.5.0]
- tibble [* -> 3.2.1]
- tzdb [* -> 0.4.0]
- xfun [* -> 0.40]
- yaml [* -> 2.3.7]
y
# Installing packages --------------------------------------------------------
- Installing R6 ... OK [linked from cache]
- Installing assertthat ... OK [linked from cache]
- Installing bit ... OK [linked from cache]
- Installing bit64 ... OK [linked from cache]
- Installing glue ... OK [linked from cache]
- Installing cli ... OK [linked from cache]
- Installing rlang ... OK [linked from cache]
- Installing lifecycle ... OK [linked from cache]
- Installing magrittr ... OK [linked from cache]
- Installing vctrs ... OK [linked from cache]
- Installing purrr ... OK [linked from cache]
- Installing withr ... OK [linked from cache]
- Installing tidyselect ... OK [linked from cache]
- Installing cpp11 ... OK [linked from cache]
- Installing arrow ... OK [linked from cache]
- Installing base64enc ... OK [linked from cache]
- Installing fastmap ... OK [linked from cache]
- Installing cachem ... OK [linked from cache]
- Installing digest ... OK [linked from cache]
- Installing ellipsis ... OK [linked from cache]
- Installing htmltools ... OK [linked from cache]
- Installing jquerylib ... OK [linked from cache]
- Installing jsonlite ... OK [linked from cache]
- Installing memoise ... OK [linked from cache]
- Installing mime ... OK [linked from cache]
- Installing fs ... OK [linked from cache]
- Installing rappdirs ... OK [linked from cache]
- Installing sass ... OK [linked from cache]
- Installing bslib ... OK [linked from cache]
- Installing clipr ... OK [linked from cache]
- Installing crayon ... OK [linked from cache]
- Installing generics ... OK [linked from cache]
- Installing fansi ... OK [linked from cache]
- Installing utf8 ... OK [linked from cache]
- Installing pillar ... OK [linked from cache]
- Installing pkgconfig ... OK [linked from cache]
- Installing tibble ... OK [linked from cache]
- Installing dplyr ... OK [linked from cache]
- Installing evaluate ... OK [linked from cache]
- Installing fontawesome ... OK [linked from cache]
- Installing rprojroot ... OK [linked from cache]
- Installing here ... OK [linked from cache]
- Installing xfun ... OK [linked from cache]
- Installing highr ... OK [linked from cache]
- Installing hms ... OK [linked from cache]
- Installing yaml ... OK [linked from cache]
- Installing knitr ... OK [linked from cache]
- Installing prettyunits ... OK [linked from cache]
- Installing progress ... OK [linked from cache]
- Installing tzdb ... OK [linked from cache]
- Installing vroom ... OK [linked from cache]
- Installing readr ... OK [linked from cache]
- Installing stringi ... OK [linked from cache]
- Installing stringr ... OK [linked from cache]
- Installing tinytex ... OK [linked from cache]
- Installing rmarkdown ... OK [linked from cache]
library(here)
here() starts at C:/Users/Marcony1/OneDrive - Fundacion Universidad de las Americas Puebla/Documents/MDS/Block 6/DSCI 532/DSCI_532_individual-assignment_marcony1
library(dplyr)
Warning: package ‘dplyr’ was built under R version 4.3.3
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(readr)
Warning: package ‘readr’ was built under R version 4.3.3
library(arrow)
Warning: package ‘arrow’ was built under R version 4.3.3
Attaching package: ‘arrow’
The following object is masked from ‘package:utils’:
timestamp
Read data
zip_file <- here("data", "raw", "iter_00_cpv2020_csv.zip")
temp_dir <- here("temp")
dir.create(temp_dir, showWarnings = FALSE)
unzip(zip_file, files = c("iter_00_cpv2020/conjunto_de_datos/conjunto_de_datos_iter_00CSV20.csv", "iter_00_cpv2020/diccionario_datos/diccionario_datos_iter_00CSV20.csv"), exdir = temp_dir)
data_path <- here(temp_dir,
"iter_00_cpv2020",
"conjunto_de_datos",
"conjunto_de_datos_iter_00CSV20.csv")
dict_path <- here(temp_dir,
"iter_00_cpv2020",
"diccionario_datos",
"diccionario_datos_iter_00CSV20.csv")
info_dict <- read_csv(dict_path)
New names:Rows: 290 Columns: 10── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): ...1, ...2, ...3, ...4, ...5, ...6
lgl (4): ...7, ...8, ...9, ...10
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- read_csv(data_path)
Rows: 195662 Columns: 286── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (283): ENTIDAD, NOM_ENT, MUN, NOM_MUN, LOC, NOM_LOC, LONGITUD, LATITUD, ALTITUD, POBFEM, POBMAS, P_0A2, P_0A2_F, P_0A2_M, P_3YMAS, P_3YMAS_F, P_3Y...
dbl (3): POBTOT, VIVTOT, TVIVHAB
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unlink(temp_dir, recursive = TRUE)
# Exporting dictionary file
write_csv(info_dict,
here("data", "raw", "diccionario_datos_iter_00CSV20.csv"))
Exploration
head(df)
head(info_dict)
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ENTIDAD : chr [1:195662] "00" "00" "00" "01" ...
$ NOM_ENT : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
$ MUN : chr [1:195662] "000" "000" "000" "000" ...
$ NOM_MUN : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
$ LOC : chr [1:195662] "0000" "9998" "9999" "0000" ...
$ NOM_LOC : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
$ LONGITUD : chr [1:195662] NA NA NA NA ...
$ LATITUD : chr [1:195662] NA NA NA NA ...
$ ALTITUD : chr [1:195662] NA NA NA NA ...
$ POBTOT : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
$ POBFEM : chr [1:195662] "64540634" "96869" "61324" "728924" ...
$ POBMAS : chr [1:195662] "61473390" "153485" "85801" "696683" ...
$ P_0A2 : chr [1:195662] "5764054" "10493" "6798" "71864" ...
$ P_0A2_F : chr [1:195662] "2848875" "5193" "3407" "35604" ...
$ P_0A2_M : chr [1:195662] "2915179" "5300" "3391" "36260" ...
$ P_3YMAS : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
$ P_3YMAS_F : chr [1:195662] "61554567" "91463" "57628" "692561" ...
$ P_3YMAS_M : chr [1:195662] "58422017" "147978" "82129" "659674" ...
$ P_5YMAS : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
$ P_5YMAS_F : chr [1:195662] "59433559" "87931" "55256" "666713" ...
$ P_5YMAS_M : chr [1:195662] "56259714" "144155" "79772" "632956" ...
$ P_12YMAS : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
$ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
$ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
$ P_15YMAS : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
$ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
$ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
$ P_18YMAS : chr [1:195662] "87492680" "186968" "104612" "960764" ...
$ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
$ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
$ P_3A5 : chr [1:195662] "6462212" "10900" "7028" "78833" ...
$ P_3A5_F : chr [1:195662] "3193548" "5270" "3511" "38679" ...
$ P_3A5_M : chr [1:195662] "3268664" "5630" "3517" "40154" ...
$ P_6A11 : chr [1:195662] "12986217" "20793" "13506" "156683" ...
$ P_6A11_F : chr [1:195662] "6398755" "10082" "6574" "77289" ...
$ P_6A11_M : chr [1:195662] "6587462" "10711" "6932" "79394" ...
$ P_8A14 : chr [1:195662] "15287375" "24342" "16724" "181905" ...
$ P_8A14_F : chr [1:195662] "7531118" "11538" "7679" "89383" ...
$ P_8A14_M : chr [1:195662] "7756257" "12804" "9045" "92522" ...
$ P_12A14 : chr [1:195662] "6542801" "10337" "7693" "77815" ...
$ P_12A14_F : chr [1:195662] "3229273" "4767" "3268" "38206" ...
$ P_12A14_M : chr [1:195662] "3313528" "5570" "4425" "39609" ...
$ P_15A17 : chr [1:195662] "6492674" "10443" "6918" "78140" ...
$ P_15A17_F : chr [1:195662] "3202134" "4830" "3091" "38298" ...
$ P_15A17_M : chr [1:195662] "3290540" "5613" "3827" "39842" ...
$ P_18A24 : chr [1:195662] "14736111" "27841" "16336" "180847" ...
$ P_18A24_F : chr [1:195662] "7398617" "11140" "6760" "90632" ...
$ P_18A24_M : chr [1:195662] "7337494" "16701" "9576" "90215" ...
$ P_15A49_F : chr [1:195662] "33885546" "47693" "29297" "388917" ...
$ P_60YMAS : chr [1:195662] "15142976" "37383" "21277" "145376" ...
$ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
$ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
$ REL_H_M : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
$ POB0_14 : chr [1:195662] "31755284" "52523" "35025" "385195" ...
$ POB15_64 : chr [1:195662] "83663440" "171209" "96250" "941834" ...
$ POB65_MAS : chr [1:195662] "10321914" "26202" "15280" "97070" ...
$ P_0A4 : chr [1:195662] "10047365" "17848" "11527" "124430" ...
$ P_0A4_F : chr [1:195662] "4969883" "8725" "5779" "61452" ...
$ P_0A4_M : chr [1:195662] "5077482" "9123" "5748" "62978" ...
$ P_5A9 : chr [1:195662] "10764379" "17380" "11274" "131048" ...
$ P_5A9_F : chr [1:195662] "5311288" "8526" "5558" "64689" ...
$ P_5A9_M : chr [1:195662] "5453091" "8854" "5716" "66359" ...
$ P_10A14 : chr [1:195662] "10943540" "17295" "12224" "129717" ...
$ P_10A14_F : chr [1:195662] "5389280" "8061" "5423" "63637" ...
$ P_10A14_M : chr [1:195662] "5554260" "9234" "6801" "66080" ...
$ P_15A19 : chr [1:195662] "10806690" "18303" "11484" "131967" ...
$ P_15A19_F : chr [1:195662] "5344540" "8138" "5140" "65064" ...
$ P_15A19_M : chr [1:195662] "5462150" "10165" "6344" "66903" ...
$ P_20A24 : chr [1:195662] "10422095" "19981" "11770" "127020" ...
$ P_20A24_F : chr [1:195662] "5256211" "7832" "4711" "63866" ...
$ P_20A24_M : chr [1:195662] "5165884" "12149" "7059" "63154" ...
$ P_25A29 : chr [1:195662] "9993001" "20584" "12238" "118426" ...
$ P_25A29_F : chr [1:195662] "5131597" "7125" "4427" "60285" ...
$ P_25A29_M : chr [1:195662] "4861404" "13459" "7811" "58141" ...
$ P_30A34 : chr [1:195662] "9420827" "19601" "11315" "106825" ...
$ P_30A34_F : chr [1:195662] "4893101" "6309" "4074" "55174" ...
$ P_30A34_M : chr [1:195662] "4527726" "13292" "7241" "51651" ...
$ P_35A39 : chr [1:195662] "9020276" "18645" "10357" "99257" ...
$ P_35A39_F : chr [1:195662] "4688746" "6289" "3825" "51483" ...
$ P_35A39_M : chr [1:195662] "4331530" "12356" "6532" "47774" ...
$ P_40A44 : chr [1:195662] "8503586" "17934" "9705" "92378" ...
$ P_40A44_F : chr [1:195662] "4441282" "6060" "3743" "48539" ...
$ P_40A44_M : chr [1:195662] "4062304" "11874" "5962" "43839" ...
$ P_45A49 : chr [1:195662] "7942413" "16840" "8668" "84669" ...
$ P_45A49_F : chr [1:195662] "4130069" "5940" "3377" "44506" ...
$ P_45A49_M : chr [1:195662] "3812344" "10900" "5291" "40163" ...
$ P_50A54 : chr [1:195662] "7037532" "15070" "7878" "74121" ...
$ P_50A54_F : chr [1:195662] "3705369" "5481" "3239" "39510" ...
$ P_50A54_M : chr [1:195662] "3332163" "9589" "4639" "34611" ...
$ P_55A59 : chr [1:195662] "5695958" "13070" "6838" "58865" ...
$ P_55A59_F : chr [1:195662] "3002982" "4728" "2823" "31257" ...
$ P_55A59_M : chr [1:195662] "2692976" "8342" "4015" "27608" ...
$ P_60A64 : chr [1:195662] "4821062" "11181" "5997" "48306" ...
$ P_60A64_F : chr [1:195662] "2563200" "4050" "2511" "25871" ...
$ P_60A64_M : chr [1:195662] "2257862" "7131" "3486" "22435" ...
$ P_65A69 : chr [1:195662] "3645077" "9160" "5052" "35823" ...
$ P_65A69_F : chr [1:195662] "1938227" "3343" "2130" "19125" ...
$ P_65A69_M : chr [1:195662] "1706850" "5817" "2922" "16698" ...
$ P_70A74 : chr [1:195662] "2647340" "6903" "3852" "25586" ...
[list output truncated]
- attr(*, "spec")=
.. cols(
.. ENTIDAD = col_character(),
.. NOM_ENT = col_character(),
.. MUN = col_character(),
.. NOM_MUN = col_character(),
.. LOC = col_character(),
.. NOM_LOC = col_character(),
.. LONGITUD = col_character(),
.. LATITUD = col_character(),
.. ALTITUD = col_character(),
.. POBTOT = col_double(),
.. POBFEM = col_character(),
.. POBMAS = col_character(),
.. P_0A2 = col_character(),
.. P_0A2_F = col_character(),
.. P_0A2_M = col_character(),
.. P_3YMAS = col_character(),
.. P_3YMAS_F = col_character(),
.. P_3YMAS_M = col_character(),
.. P_5YMAS = col_character(),
.. P_5YMAS_F = col_character(),
.. P_5YMAS_M = col_character(),
.. P_12YMAS = col_character(),
.. P_12YMAS_F = col_character(),
.. P_12YMAS_M = col_character(),
.. P_15YMAS = col_character(),
.. P_15YMAS_F = col_character(),
.. P_15YMAS_M = col_character(),
.. P_18YMAS = col_character(),
.. P_18YMAS_F = col_character(),
.. P_18YMAS_M = col_character(),
.. P_3A5 = col_character(),
.. P_3A5_F = col_character(),
.. P_3A5_M = col_character(),
.. P_6A11 = col_character(),
.. P_6A11_F = col_character(),
.. P_6A11_M = col_character(),
.. P_8A14 = col_character(),
.. P_8A14_F = col_character(),
.. P_8A14_M = col_character(),
.. P_12A14 = col_character(),
.. P_12A14_F = col_character(),
.. P_12A14_M = col_character(),
.. P_15A17 = col_character(),
.. P_15A17_F = col_character(),
.. P_15A17_M = col_character(),
.. P_18A24 = col_character(),
.. P_18A24_F = col_character(),
.. P_18A24_M = col_character(),
.. P_15A49_F = col_character(),
.. P_60YMAS = col_character(),
.. P_60YMAS_F = col_character(),
.. P_60YMAS_M = col_character(),
.. REL_H_M = col_character(),
.. POB0_14 = col_character(),
.. POB15_64 = col_character(),
.. POB65_MAS = col_character(),
.. P_0A4 = col_character(),
.. P_0A4_F = col_character(),
.. P_0A4_M = col_character(),
.. P_5A9 = col_character(),
.. P_5A9_F = col_character(),
.. P_5A9_M = col_character(),
.. P_10A14 = col_character(),
.. P_10A14_F = col_character(),
.. P_10A14_M = col_character(),
.. P_15A19 = col_character(),
.. P_15A19_F = col_character(),
.. P_15A19_M = col_character(),
.. P_20A24 = col_character(),
.. P_20A24_F = col_character(),
.. P_20A24_M = col_character(),
.. P_25A29 = col_character(),
.. P_25A29_F = col_character(),
.. P_25A29_M = col_character(),
.. P_30A34 = col_character(),
.. P_30A34_F = col_character(),
.. P_30A34_M = col_character(),
.. P_35A39 = col_character(),
.. P_35A39_F = col_character(),
.. P_35A39_M = col_character(),
.. P_40A44 = col_character(),
.. P_40A44_F = col_character(),
.. P_40A44_M = col_character(),
.. P_45A49 = col_character(),
.. P_45A49_F = col_character(),
.. P_45A49_M = col_character(),
.. P_50A54 = col_character(),
.. P_50A54_F = col_character(),
.. P_50A54_M = col_character(),
.. P_55A59 = col_character(),
.. P_55A59_F = col_character(),
.. P_55A59_M = col_character(),
.. P_60A64 = col_character(),
.. P_60A64_F = col_character(),
.. P_60A64_M = col_character(),
.. P_65A69 = col_character(),
.. P_65A69_F = col_character(),
.. P_65A69_M = col_character(),
.. P_70A74 = col_character(),
.. P_70A74_F = col_character(),
.. P_70A74_M = col_character(),
.. P_75A79 = col_character(),
.. P_75A79_F = col_character(),
.. P_75A79_M = col_character(),
.. P_80A84 = col_character(),
.. P_80A84_F = col_character(),
.. P_80A84_M = col_character(),
.. P_85YMAS = col_character(),
.. P_85YMAS_F = col_character(),
.. P_85YMAS_M = col_character(),
.. PROM_HNV = col_character(),
.. PNACENT = col_character(),
.. PNACENT_F = col_character(),
.. PNACENT_M = col_character(),
.. PNACOE = col_character(),
.. PNACOE_F = col_character(),
.. PNACOE_M = col_character(),
.. PRES2015 = col_character(),
.. PRES2015_F = col_character(),
.. PRES2015_M = col_character(),
.. PRESOE15 = col_character(),
.. PRESOE15_F = col_character(),
.. PRESOE15_M = col_character(),
.. P3YM_HLI = col_character(),
.. P3YM_HLI_F = col_character(),
.. P3YM_HLI_M = col_character(),
.. P3HLINHE = col_character(),
.. P3HLINHE_F = col_character(),
.. P3HLINHE_M = col_character(),
.. P3HLI_HE = col_character(),
.. P3HLI_HE_F = col_character(),
.. P3HLI_HE_M = col_character(),
.. P5_HLI = col_character(),
.. P5_HLI_NHE = col_character(),
.. P5_HLI_HE = col_character(),
.. PHOG_IND = col_character(),
.. POB_AFRO = col_character(),
.. POB_AFRO_F = col_character(),
.. POB_AFRO_M = col_character(),
.. PCON_DISC = col_character(),
.. PCDISC_MOT = col_character(),
.. PCDISC_VIS = col_character(),
.. PCDISC_LENG = col_character(),
.. PCDISC_AUD = col_character(),
.. PCDISC_MOT2 = col_character(),
.. PCDISC_MEN = col_character(),
.. PCON_LIMI = col_character(),
.. PCLIM_CSB = col_character(),
.. PCLIM_VIS = col_character(),
.. PCLIM_HACO = col_character(),
.. PCLIM_OAUD = col_character(),
.. PCLIM_MOT2 = col_character(),
.. PCLIM_RE_CO = col_character(),
.. PCLIM_PMEN = col_character(),
.. PSIND_LIM = col_character(),
.. P3A5_NOA = col_character(),
.. P3A5_NOA_F = col_character(),
.. P3A5_NOA_M = col_character(),
.. P6A11_NOA = col_character(),
.. P6A11_NOAF = col_character(),
.. P6A11_NOAM = col_character(),
.. P12A14NOA = col_character(),
.. P12A14NOAF = col_character(),
.. P12A14NOAM = col_character(),
.. P15A17A = col_character(),
.. P15A17A_F = col_character(),
.. P15A17A_M = col_character(),
.. P18A24A = col_character(),
.. P18A24A_F = col_character(),
.. P18A24A_M = col_character(),
.. P8A14AN = col_character(),
.. P8A14AN_F = col_character(),
.. P8A14AN_M = col_character(),
.. P15YM_AN = col_character(),
.. P15YM_AN_F = col_character(),
.. P15YM_AN_M = col_character(),
.. P15YM_SE = col_character(),
.. P15YM_SE_F = col_character(),
.. P15YM_SE_M = col_character(),
.. P15PRI_IN = col_character(),
.. P15PRI_INF = col_character(),
.. P15PRI_INM = col_character(),
.. P15PRI_CO = col_character(),
.. P15PRI_COF = col_character(),
.. P15PRI_COM = col_character(),
.. P15SEC_IN = col_character(),
.. P15SEC_INF = col_character(),
.. P15SEC_INM = col_character(),
.. P15SEC_CO = col_character(),
.. P15SEC_COF = col_character(),
.. P15SEC_COM = col_character(),
.. P18YM_PB = col_character(),
.. P18YM_PB_F = col_character(),
.. P18YM_PB_M = col_character(),
.. GRAPROES = col_character(),
.. GRAPROES_F = col_character(),
.. GRAPROES_M = col_character(),
.. PEA = col_character(),
.. PEA_F = col_character(),
.. PEA_M = col_character(),
.. PE_INAC = col_character(),
.. PE_INAC_F = col_character(),
.. PE_INAC_M = col_character(),
.. POCUPADA = col_character(),
.. POCUPADA_F = col_character(),
.. POCUPADA_M = col_character(),
.. PDESOCUP = col_character(),
.. PDESOCUP_F = col_character(),
.. PDESOCUP_M = col_character(),
.. PSINDER = col_character(),
.. PDER_SS = col_character(),
.. PDER_IMSS = col_character(),
.. PDER_ISTE = col_character(),
.. PDER_ISTEE = col_character(),
.. PAFIL_PDOM = col_character(),
.. PDER_SEGP = col_character(),
.. PDER_IMSSB = col_character(),
.. PAFIL_IPRIV = col_character(),
.. PAFIL_OTRAI = col_character(),
.. P12YM_SOLT = col_character(),
.. P12YM_CASA = col_character(),
.. P12YM_SEPA = col_character(),
.. PCATOLICA = col_character(),
.. PRO_CRIEVA = col_character(),
.. POTRAS_REL = col_character(),
.. PSIN_RELIG = col_character(),
.. TOTHOG = col_character(),
.. HOGJEF_F = col_character(),
.. HOGJEF_M = col_character(),
.. POBHOG = col_character(),
.. PHOGJEF_F = col_character(),
.. PHOGJEF_M = col_character(),
.. VIVTOT = col_double(),
.. TVIVHAB = col_double(),
.. TVIVPAR = col_character(),
.. VIVPAR_HAB = col_character(),
.. VIVPARH_CV = col_character(),
.. TVIVPARHAB = col_character(),
.. VIVPAR_DES = col_character(),
.. VIVPAR_UT = col_character(),
.. OCUPVIVPAR = col_character(),
.. PROM_OCUP = col_character(),
.. PRO_OCUP_C = col_character(),
.. VPH_PISODT = col_character(),
.. VPH_PISOTI = col_character(),
.. VPH_1DOR = col_character(),
.. VPH_2YMASD = col_character(),
.. VPH_1CUART = col_character(),
.. VPH_2CUART = col_character(),
.. VPH_3YMASC = col_character(),
.. VPH_C_ELEC = col_character(),
.. VPH_S_ELEC = col_character(),
.. VPH_AGUADV = col_character(),
.. VPH_AEASP = col_character(),
.. VPH_AGUAFV = col_character(),
.. VPH_TINACO = col_character(),
.. VPH_CISTER = col_character(),
.. VPH_EXCSA = col_character(),
.. VPH_LETR = col_character(),
.. VPH_DRENAJ = col_character(),
.. VPH_NODREN = col_character(),
.. VPH_C_SERV = col_character(),
.. VPH_NDEAED = col_character(),
.. VPH_DSADMA = col_character(),
.. VPH_NDACMM = col_character(),
.. VPH_SNBIEN = col_character(),
.. VPH_REFRI = col_character(),
.. VPH_LAVAD = col_character(),
.. VPH_HMICRO = col_character(),
.. VPH_AUTOM = col_character(),
.. VPH_MOTO = col_character(),
.. VPH_BICI = col_character(),
.. VPH_RADIO = col_character(),
.. VPH_TV = col_character(),
.. VPH_PC = col_character(),
.. VPH_TELEF = col_character(),
.. VPH_CEL = col_character(),
.. VPH_INTER = col_character(),
.. VPH_STVP = col_character(),
.. VPH_SPMVPI = col_character(),
.. VPH_CVJ = col_character(),
.. VPH_SINRTV = col_character(),
.. VPH_SINLTC = col_character(),
.. VPH_SINCINT = col_character(),
.. VPH_SINTIC = col_character(),
.. TAMLOC = col_character()
.. )
- attr(*, "problems")=<externalptr>
clean_info_dict <- info_dict[-c(1:3), ]
names(clean_info_dict) <- clean_info_dict[1, ]
Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.Warning: The `value` argument of `names<-` must be a character vector as of tibble 3.0.0.
clean_info_dict <- clean_info_dict[-1,]
clean_info_dict <- clean_info_dict[, -c(7:10)]
clean_info_dict
unique_states <- df |>
distinct(NOM_ENT)
write_csv(unique_states, here("data", "processed", "entity_names.csv"))
unique_states
entities_csv <- read_csv(here("data", "processed", "entity_names.csv")) |> pull()
Rows: 33 Columns: 1── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): NOM_ENT
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(entities_csv)
[1] "Total nacional" "Aguascalientes" "Baja California" "Baja California Sur"
[5] "Campeche" "Coahuila de Zaragoza" "Colima" "Chiapas"
[9] "Chihuahua" "Ciudad de México" "Durango" "Guanajuato"
[13] "Guerrero" "Hidalgo" "Jalisco" "México"
[17] "Michoacán de Ocampo" "Morelos" "Nayarit" "Nuevo León"
[21] "Oaxaca" "Puebla" "Querétaro" "Quintana Roo"
[25] "San Luis Potosí" "Sinaloa" "Sonora" "Tabasco"
[29] "Tamaulipas" "Tlaxcala" "Veracruz de Ignacio de la Llave" "Yucatán"
[33] "Zacatecas"
Selecting rows that we’ll analyze
rows_to_include <- c(1:12, 53:132, 136:140, 147, 155:211, 220:232)
filtered_data <- clean_info_dict |>
filter(row_number() %in% rows_to_include) |>
pull(4)
filtered_data
[1] "ENTIDAD" "NOM_ENT" "MUN" "NOM_MUN" "LOC" "NOM_LOC" "LONGITUD" "LATITUD" "ALTITUD" "POBTOT" "POBFEM"
[12] "POBMAS" "REL_H_M" "POB0_14" "POB15_64" "POB65_MAS" "P_0A4" "P_0A4_F" "P_0A4_M" "P_5A9" "P_5A9_F" "P_5A9_M"
[23] "P_10A14" "P_10A14_F" "P_10A14_M" "P_15A19" "P_15A19_F" "P_15A19_M" "P_20A24" "P_20A24_F" "P_20A24_M" "P_25A29" "P_25A29_F"
[34] "P_25A29_M" "P_30A34" "P_30A34_F" "P_30A34_M" "P_35A39" "P_35A39_F" "P_35A39_M" "P_40A44" "P_40A44_F" "P_40A44_M" "P_45A49"
[45] "P_45A49_F" "P_45A49_M" "P_50A54" "P_50A54_F" "P_50A54_M" "P_55A59" "P_55A59_F" "P_55A59_M" "P_60A64" "P_60A64_F" "P_60A64_M"
[56] "P_65A69" "P_65A69_F" "P_65A69_M" "P_70A74" "P_70A74_F" "P_70A74_M" "P_75A79" "P_75A79_F" "P_75A79_M" "P_80A84" "P_80A84_F"
[67] "P_80A84_M" "P_85YMAS" "P_85YMAS_F" "P_85YMAS_M" "PROM_HNV" "PNACENT" "PNACENT_F" "PNACENT_M" "PNACOE" "PNACOE_F" "PNACOE_M"
[78] "PRES2015" "PRES2015_F" "PRES2015_M" "PRESOE15" "PRESOE15_F" "PRESOE15_M" "P3YM_HLI" "P3YM_HLI_F" "P3YM_HLI_M" "P3HLINHE" "P3HLINHE_F"
[89] "P3HLINHE_M" "P3HLI_HE" "P3HLI_HE_F" "P3HLI_HE_M" "PHOG_IND" "POB_AFRO" "POB_AFRO_F" "POB_AFRO_M" "PCON_DISC" "PCON_LIMI" "PSIND_LIM"
[100] "P3A5_NOA" "P3A5_NOA_F" "P3A5_NOA_M" "P6A11_NOA" "P6A11_NOAF" "P6A11_NOAM" "P12A14NOA" "P12A14NOAF" "P12A14NOAM" "P15A17A" "P15A17A_F"
[111] "P15A17A_M" "P18A24A" "P18A24A_F" "P18A24A_M" "P8A14AN" "P8A14AN_F" "P8A14AN_M" "P15YM_AN" "P15YM_AN_F" "P15YM_AN_M" "P15YM_SE"
[122] "P15YM_SE_F" "P15YM_SE_M" "P15PRI_IN" "P15PRI_INF" "P15PRI_INM" "P15PRI_CO" "P15PRI_COF" "P15PRI_COM" "P15SEC_IN" "P15SEC_INF" "P15SEC_INM"
[133] "P15SEC_CO" "P15SEC_COF" "P15SEC_COM" "P18YM_PB" "P18YM_PB_F" "P18YM_PB_M" "GRAPROES" "GRAPROES_F" "GRAPROES_M" "PEA" "PEA_F"
[144] "PEA_M" "PE_INAC" "PE_INAC_F" "PE_INAC_M" "POCUPADA" "POCUPADA_F" "POCUPADA_M" "PDESOCUP" "PDESOCUP_F" "PDESOCUP_M" "PSINDER"
[155] "PDER_SS" "P12YM_SOLT" "P12YM_CASA" "P12YM_SEPA" "PCATOLICA" "PRO_CRIEVA" "POTRAS_REL" "PSIN_RELIG" "TOTHOG" "HOGJEF_F" "HOGJEF_M"
[166] "POBHOG" "PHOGJEF_F" "PHOGJEF_M"
selected_df <- df |>
select(filtered_data)
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(filtered_data)
# Now:
data %>% select(all_of(filtered_data))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
selected_df
EDA before exporting
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ENTIDAD : chr [1:195662] "00" "00" "00" "01" ...
$ NOM_ENT : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
$ MUN : chr [1:195662] "000" "000" "000" "000" ...
$ NOM_MUN : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
$ LOC : chr [1:195662] "0000" "9998" "9999" "0000" ...
$ NOM_LOC : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
$ LONGITUD : chr [1:195662] NA NA NA NA ...
$ LATITUD : chr [1:195662] NA NA NA NA ...
$ ALTITUD : chr [1:195662] NA NA NA NA ...
$ POBTOT : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
$ POBFEM : chr [1:195662] "64540634" "96869" "61324" "728924" ...
$ POBMAS : chr [1:195662] "61473390" "153485" "85801" "696683" ...
$ P_0A2 : chr [1:195662] "5764054" "10493" "6798" "71864" ...
$ P_0A2_F : chr [1:195662] "2848875" "5193" "3407" "35604" ...
$ P_0A2_M : chr [1:195662] "2915179" "5300" "3391" "36260" ...
$ P_3YMAS : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
$ P_3YMAS_F : chr [1:195662] "61554567" "91463" "57628" "692561" ...
$ P_3YMAS_M : chr [1:195662] "58422017" "147978" "82129" "659674" ...
$ P_5YMAS : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
$ P_5YMAS_F : chr [1:195662] "59433559" "87931" "55256" "666713" ...
$ P_5YMAS_M : chr [1:195662] "56259714" "144155" "79772" "632956" ...
$ P_12YMAS : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
$ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
$ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
$ P_15YMAS : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
$ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
$ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
$ P_18YMAS : chr [1:195662] "87492680" "186968" "104612" "960764" ...
$ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
$ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
$ P_3A5 : chr [1:195662] "6462212" "10900" "7028" "78833" ...
$ P_3A5_F : chr [1:195662] "3193548" "5270" "3511" "38679" ...
$ P_3A5_M : chr [1:195662] "3268664" "5630" "3517" "40154" ...
$ P_6A11 : chr [1:195662] "12986217" "20793" "13506" "156683" ...
$ P_6A11_F : chr [1:195662] "6398755" "10082" "6574" "77289" ...
$ P_6A11_M : chr [1:195662] "6587462" "10711" "6932" "79394" ...
$ P_8A14 : chr [1:195662] "15287375" "24342" "16724" "181905" ...
$ P_8A14_F : chr [1:195662] "7531118" "11538" "7679" "89383" ...
$ P_8A14_M : chr [1:195662] "7756257" "12804" "9045" "92522" ...
$ P_12A14 : chr [1:195662] "6542801" "10337" "7693" "77815" ...
$ P_12A14_F : chr [1:195662] "3229273" "4767" "3268" "38206" ...
$ P_12A14_M : chr [1:195662] "3313528" "5570" "4425" "39609" ...
$ P_15A17 : chr [1:195662] "6492674" "10443" "6918" "78140" ...
$ P_15A17_F : chr [1:195662] "3202134" "4830" "3091" "38298" ...
$ P_15A17_M : chr [1:195662] "3290540" "5613" "3827" "39842" ...
$ P_18A24 : chr [1:195662] "14736111" "27841" "16336" "180847" ...
$ P_18A24_F : chr [1:195662] "7398617" "11140" "6760" "90632" ...
$ P_18A24_M : chr [1:195662] "7337494" "16701" "9576" "90215" ...
$ P_15A49_F : chr [1:195662] "33885546" "47693" "29297" "388917" ...
$ P_60YMAS : chr [1:195662] "15142976" "37383" "21277" "145376" ...
$ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
$ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
$ REL_H_M : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
$ POB0_14 : chr [1:195662] "31755284" "52523" "35025" "385195" ...
$ POB15_64 : chr [1:195662] "83663440" "171209" "96250" "941834" ...
$ POB65_MAS : chr [1:195662] "10321914" "26202" "15280" "97070" ...
$ P_0A4 : chr [1:195662] "10047365" "17848" "11527" "124430" ...
$ P_0A4_F : chr [1:195662] "4969883" "8725" "5779" "61452" ...
$ P_0A4_M : chr [1:195662] "5077482" "9123" "5748" "62978" ...
$ P_5A9 : chr [1:195662] "10764379" "17380" "11274" "131048" ...
$ P_5A9_F : chr [1:195662] "5311288" "8526" "5558" "64689" ...
$ P_5A9_M : chr [1:195662] "5453091" "8854" "5716" "66359" ...
$ P_10A14 : chr [1:195662] "10943540" "17295" "12224" "129717" ...
$ P_10A14_F : chr [1:195662] "5389280" "8061" "5423" "63637" ...
$ P_10A14_M : chr [1:195662] "5554260" "9234" "6801" "66080" ...
$ P_15A19 : chr [1:195662] "10806690" "18303" "11484" "131967" ...
$ P_15A19_F : chr [1:195662] "5344540" "8138" "5140" "65064" ...
$ P_15A19_M : chr [1:195662] "5462150" "10165" "6344" "66903" ...
$ P_20A24 : chr [1:195662] "10422095" "19981" "11770" "127020" ...
$ P_20A24_F : chr [1:195662] "5256211" "7832" "4711" "63866" ...
$ P_20A24_M : chr [1:195662] "5165884" "12149" "7059" "63154" ...
$ P_25A29 : chr [1:195662] "9993001" "20584" "12238" "118426" ...
$ P_25A29_F : chr [1:195662] "5131597" "7125" "4427" "60285" ...
$ P_25A29_M : chr [1:195662] "4861404" "13459" "7811" "58141" ...
$ P_30A34 : chr [1:195662] "9420827" "19601" "11315" "106825" ...
$ P_30A34_F : chr [1:195662] "4893101" "6309" "4074" "55174" ...
$ P_30A34_M : chr [1:195662] "4527726" "13292" "7241" "51651" ...
$ P_35A39 : chr [1:195662] "9020276" "18645" "10357" "99257" ...
$ P_35A39_F : chr [1:195662] "4688746" "6289" "3825" "51483" ...
$ P_35A39_M : chr [1:195662] "4331530" "12356" "6532" "47774" ...
$ P_40A44 : chr [1:195662] "8503586" "17934" "9705" "92378" ...
$ P_40A44_F : chr [1:195662] "4441282" "6060" "3743" "48539" ...
$ P_40A44_M : chr [1:195662] "4062304" "11874" "5962" "43839" ...
$ P_45A49 : chr [1:195662] "7942413" "16840" "8668" "84669" ...
$ P_45A49_F : chr [1:195662] "4130069" "5940" "3377" "44506" ...
$ P_45A49_M : chr [1:195662] "3812344" "10900" "5291" "40163" ...
$ P_50A54 : chr [1:195662] "7037532" "15070" "7878" "74121" ...
$ P_50A54_F : chr [1:195662] "3705369" "5481" "3239" "39510" ...
$ P_50A54_M : chr [1:195662] "3332163" "9589" "4639" "34611" ...
$ P_55A59 : chr [1:195662] "5695958" "13070" "6838" "58865" ...
$ P_55A59_F : chr [1:195662] "3002982" "4728" "2823" "31257" ...
$ P_55A59_M : chr [1:195662] "2692976" "8342" "4015" "27608" ...
$ P_60A64 : chr [1:195662] "4821062" "11181" "5997" "48306" ...
$ P_60A64_F : chr [1:195662] "2563200" "4050" "2511" "25871" ...
$ P_60A64_M : chr [1:195662] "2257862" "7131" "3486" "22435" ...
$ P_65A69 : chr [1:195662] "3645077" "9160" "5052" "35823" ...
$ P_65A69_F : chr [1:195662] "1938227" "3343" "2130" "19125" ...
$ P_65A69_M : chr [1:195662] "1706850" "5817" "2922" "16698" ...
$ P_70A74 : chr [1:195662] "2647340" "6903" "3852" "25586" ...
[list output truncated]
- attr(*, "spec")=
.. cols(
.. ENTIDAD = col_character(),
.. NOM_ENT = col_character(),
.. MUN = col_character(),
.. NOM_MUN = col_character(),
.. LOC = col_character(),
.. NOM_LOC = col_character(),
.. LONGITUD = col_character(),
.. LATITUD = col_character(),
.. ALTITUD = col_character(),
.. POBTOT = col_double(),
.. POBFEM = col_character(),
.. POBMAS = col_character(),
.. P_0A2 = col_character(),
.. P_0A2_F = col_character(),
.. P_0A2_M = col_character(),
.. P_3YMAS = col_character(),
.. P_3YMAS_F = col_character(),
.. P_3YMAS_M = col_character(),
.. P_5YMAS = col_character(),
.. P_5YMAS_F = col_character(),
.. P_5YMAS_M = col_character(),
.. P_12YMAS = col_character(),
.. P_12YMAS_F = col_character(),
.. P_12YMAS_M = col_character(),
.. P_15YMAS = col_character(),
.. P_15YMAS_F = col_character(),
.. P_15YMAS_M = col_character(),
.. P_18YMAS = col_character(),
.. P_18YMAS_F = col_character(),
.. P_18YMAS_M = col_character(),
.. P_3A5 = col_character(),
.. P_3A5_F = col_character(),
.. P_3A5_M = col_character(),
.. P_6A11 = col_character(),
.. P_6A11_F = col_character(),
.. P_6A11_M = col_character(),
.. P_8A14 = col_character(),
.. P_8A14_F = col_character(),
.. P_8A14_M = col_character(),
.. P_12A14 = col_character(),
.. P_12A14_F = col_character(),
.. P_12A14_M = col_character(),
.. P_15A17 = col_character(),
.. P_15A17_F = col_character(),
.. P_15A17_M = col_character(),
.. P_18A24 = col_character(),
.. P_18A24_F = col_character(),
.. P_18A24_M = col_character(),
.. P_15A49_F = col_character(),
.. P_60YMAS = col_character(),
.. P_60YMAS_F = col_character(),
.. P_60YMAS_M = col_character(),
.. REL_H_M = col_character(),
.. POB0_14 = col_character(),
.. POB15_64 = col_character(),
.. POB65_MAS = col_character(),
.. P_0A4 = col_character(),
.. P_0A4_F = col_character(),
.. P_0A4_M = col_character(),
.. P_5A9 = col_character(),
.. P_5A9_F = col_character(),
.. P_5A9_M = col_character(),
.. P_10A14 = col_character(),
.. P_10A14_F = col_character(),
.. P_10A14_M = col_character(),
.. P_15A19 = col_character(),
.. P_15A19_F = col_character(),
.. P_15A19_M = col_character(),
.. P_20A24 = col_character(),
.. P_20A24_F = col_character(),
.. P_20A24_M = col_character(),
.. P_25A29 = col_character(),
.. P_25A29_F = col_character(),
.. P_25A29_M = col_character(),
.. P_30A34 = col_character(),
.. P_30A34_F = col_character(),
.. P_30A34_M = col_character(),
.. P_35A39 = col_character(),
.. P_35A39_F = col_character(),
.. P_35A39_M = col_character(),
.. P_40A44 = col_character(),
.. P_40A44_F = col_character(),
.. P_40A44_M = col_character(),
.. P_45A49 = col_character(),
.. P_45A49_F = col_character(),
.. P_45A49_M = col_character(),
.. P_50A54 = col_character(),
.. P_50A54_F = col_character(),
.. P_50A54_M = col_character(),
.. P_55A59 = col_character(),
.. P_55A59_F = col_character(),
.. P_55A59_M = col_character(),
.. P_60A64 = col_character(),
.. P_60A64_F = col_character(),
.. P_60A64_M = col_character(),
.. P_65A69 = col_character(),
.. P_65A69_F = col_character(),
.. P_65A69_M = col_character(),
.. P_70A74 = col_character(),
.. P_70A74_F = col_character(),
.. P_70A74_M = col_character(),
.. P_75A79 = col_character(),
.. P_75A79_F = col_character(),
.. P_75A79_M = col_character(),
.. P_80A84 = col_character(),
.. P_80A84_F = col_character(),
.. P_80A84_M = col_character(),
.. P_85YMAS = col_character(),
.. P_85YMAS_F = col_character(),
.. P_85YMAS_M = col_character(),
.. PROM_HNV = col_character(),
.. PNACENT = col_character(),
.. PNACENT_F = col_character(),
.. PNACENT_M = col_character(),
.. PNACOE = col_character(),
.. PNACOE_F = col_character(),
.. PNACOE_M = col_character(),
.. PRES2015 = col_character(),
.. PRES2015_F = col_character(),
.. PRES2015_M = col_character(),
.. PRESOE15 = col_character(),
.. PRESOE15_F = col_character(),
.. PRESOE15_M = col_character(),
.. P3YM_HLI = col_character(),
.. P3YM_HLI_F = col_character(),
.. P3YM_HLI_M = col_character(),
.. P3HLINHE = col_character(),
.. P3HLINHE_F = col_character(),
.. P3HLINHE_M = col_character(),
.. P3HLI_HE = col_character(),
.. P3HLI_HE_F = col_character(),
.. P3HLI_HE_M = col_character(),
.. P5_HLI = col_character(),
.. P5_HLI_NHE = col_character(),
.. P5_HLI_HE = col_character(),
.. PHOG_IND = col_character(),
.. POB_AFRO = col_character(),
.. POB_AFRO_F = col_character(),
.. POB_AFRO_M = col_character(),
.. PCON_DISC = col_character(),
.. PCDISC_MOT = col_character(),
.. PCDISC_VIS = col_character(),
.. PCDISC_LENG = col_character(),
.. PCDISC_AUD = col_character(),
.. PCDISC_MOT2 = col_character(),
.. PCDISC_MEN = col_character(),
.. PCON_LIMI = col_character(),
.. PCLIM_CSB = col_character(),
.. PCLIM_VIS = col_character(),
.. PCLIM_HACO = col_character(),
.. PCLIM_OAUD = col_character(),
.. PCLIM_MOT2 = col_character(),
.. PCLIM_RE_CO = col_character(),
.. PCLIM_PMEN = col_character(),
.. PSIND_LIM = col_character(),
.. P3A5_NOA = col_character(),
.. P3A5_NOA_F = col_character(),
.. P3A5_NOA_M = col_character(),
.. P6A11_NOA = col_character(),
.. P6A11_NOAF = col_character(),
.. P6A11_NOAM = col_character(),
.. P12A14NOA = col_character(),
.. P12A14NOAF = col_character(),
.. P12A14NOAM = col_character(),
.. P15A17A = col_character(),
.. P15A17A_F = col_character(),
.. P15A17A_M = col_character(),
.. P18A24A = col_character(),
.. P18A24A_F = col_character(),
.. P18A24A_M = col_character(),
.. P8A14AN = col_character(),
.. P8A14AN_F = col_character(),
.. P8A14AN_M = col_character(),
.. P15YM_AN = col_character(),
.. P15YM_AN_F = col_character(),
.. P15YM_AN_M = col_character(),
.. P15YM_SE = col_character(),
.. P15YM_SE_F = col_character(),
.. P15YM_SE_M = col_character(),
.. P15PRI_IN = col_character(),
.. P15PRI_INF = col_character(),
.. P15PRI_INM = col_character(),
.. P15PRI_CO = col_character(),
.. P15PRI_COF = col_character(),
.. P15PRI_COM = col_character(),
.. P15SEC_IN = col_character(),
.. P15SEC_INF = col_character(),
.. P15SEC_INM = col_character(),
.. P15SEC_CO = col_character(),
.. P15SEC_COF = col_character(),
.. P15SEC_COM = col_character(),
.. P18YM_PB = col_character(),
.. P18YM_PB_F = col_character(),
.. P18YM_PB_M = col_character(),
.. GRAPROES = col_character(),
.. GRAPROES_F = col_character(),
.. GRAPROES_M = col_character(),
.. PEA = col_character(),
.. PEA_F = col_character(),
.. PEA_M = col_character(),
.. PE_INAC = col_character(),
.. PE_INAC_F = col_character(),
.. PE_INAC_M = col_character(),
.. POCUPADA = col_character(),
.. POCUPADA_F = col_character(),
.. POCUPADA_M = col_character(),
.. PDESOCUP = col_character(),
.. PDESOCUP_F = col_character(),
.. PDESOCUP_M = col_character(),
.. PSINDER = col_character(),
.. PDER_SS = col_character(),
.. PDER_IMSS = col_character(),
.. PDER_ISTE = col_character(),
.. PDER_ISTEE = col_character(),
.. PAFIL_PDOM = col_character(),
.. PDER_SEGP = col_character(),
.. PDER_IMSSB = col_character(),
.. PAFIL_IPRIV = col_character(),
.. PAFIL_OTRAI = col_character(),
.. P12YM_SOLT = col_character(),
.. P12YM_CASA = col_character(),
.. P12YM_SEPA = col_character(),
.. PCATOLICA = col_character(),
.. PRO_CRIEVA = col_character(),
.. POTRAS_REL = col_character(),
.. PSIN_RELIG = col_character(),
.. TOTHOG = col_character(),
.. HOGJEF_F = col_character(),
.. HOGJEF_M = col_character(),
.. POBHOG = col_character(),
.. PHOGJEF_F = col_character(),
.. PHOGJEF_M = col_character(),
.. VIVTOT = col_double(),
.. TVIVHAB = col_double(),
.. TVIVPAR = col_character(),
.. VIVPAR_HAB = col_character(),
.. VIVPARH_CV = col_character(),
.. TVIVPARHAB = col_character(),
.. VIVPAR_DES = col_character(),
.. VIVPAR_UT = col_character(),
.. OCUPVIVPAR = col_character(),
.. PROM_OCUP = col_character(),
.. PRO_OCUP_C = col_character(),
.. VPH_PISODT = col_character(),
.. VPH_PISOTI = col_character(),
.. VPH_1DOR = col_character(),
.. VPH_2YMASD = col_character(),
.. VPH_1CUART = col_character(),
.. VPH_2CUART = col_character(),
.. VPH_3YMASC = col_character(),
.. VPH_C_ELEC = col_character(),
.. VPH_S_ELEC = col_character(),
.. VPH_AGUADV = col_character(),
.. VPH_AEASP = col_character(),
.. VPH_AGUAFV = col_character(),
.. VPH_TINACO = col_character(),
.. VPH_CISTER = col_character(),
.. VPH_EXCSA = col_character(),
.. VPH_LETR = col_character(),
.. VPH_DRENAJ = col_character(),
.. VPH_NODREN = col_character(),
.. VPH_C_SERV = col_character(),
.. VPH_NDEAED = col_character(),
.. VPH_DSADMA = col_character(),
.. VPH_NDACMM = col_character(),
.. VPH_SNBIEN = col_character(),
.. VPH_REFRI = col_character(),
.. VPH_LAVAD = col_character(),
.. VPH_HMICRO = col_character(),
.. VPH_AUTOM = col_character(),
.. VPH_MOTO = col_character(),
.. VPH_BICI = col_character(),
.. VPH_RADIO = col_character(),
.. VPH_TV = col_character(),
.. VPH_PC = col_character(),
.. VPH_TELEF = col_character(),
.. VPH_CEL = col_character(),
.. VPH_INTER = col_character(),
.. VPH_STVP = col_character(),
.. VPH_SPMVPI = col_character(),
.. VPH_CVJ = col_character(),
.. VPH_SINRTV = col_character(),
.. VPH_SINLTC = col_character(),
.. VPH_SINCINT = col_character(),
.. VPH_SINTIC = col_character(),
.. TAMLOC = col_character()
.. )
- attr(*, "problems")=<externalptr>
Exporting as parquet
# Export wrangled data as parquet file
table <- arrow::Table$create(selected_df)
output_dir <- here("data", "processed", "parquet_data")
arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT", "ENTIDAD"), existing_data_behavior = "overwrite")
Reading parquet
ds <- open_dataset(here("data", "processed", "parquet_data")) |>
collect()
ds
Puebla
ds_puebla <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Puebla") |>
collect()
ds_puebla
Yucatán
ds_yucatan <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Yucatán") |>
collect()
ds_yucatan
Nuevo León
ds_puebla <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Puebla") |>
collect()
ds_puebla
Verify datasets are not empty
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
if (nrow(read_dfs) == 0) {
print(paste("Dataset is empty", value))
} else {
print(paste("OK", value, nrow(read_dfs)))
}
}
[1] "OK Total nacional 3"
[1] "OK Aguascalientes 2058"
[1] "OK Baja California 5566"
[1] "OK Baja California Sur 2561"
[1] "OK Campeche 2800"
[1] "OK Coahuila de Zaragoza 4149"
[1] "OK Colima 1259"
[1] "OK Chiapas 21487"
[1] "OK Chihuahua 12389"
[1] "OK Ciudad de México 666"
[1] "OK Durango 6006"
[1] "OK Guanajuato 8945"
[1] "OK Guerrero 7001"
[1] "OK Hidalgo 4916"
[1] "OK Jalisco 10715"
[1] "OK México 5136"
[1] "OK Michoacán de Ocampo 8956"
[1] "OK Morelos 1678"
[1] "OK Nayarit 2913"
[1] "OK Nuevo León 4974"
[1] "OK Oaxaca 11856"
[1] "OK Puebla 7059"
[1] "OK Querétaro 2249"
[1] "OK Quintana Roo 2243"
[1] "OK San Luis Potosí 6729"
[1] "OK Sinaloa 5552"
[1] "OK Sonora 7500"
[1] "OK Tabasco 2517"
[1] "OK Tamaulipas 6695"
[1] "OK Tlaxcala 1323"
[1] "OK Veracruz de Ignacio de la Llave 20401"
[1] "OK Yucatán 2691"
[1] "OK Zacatecas 4669"
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmF1dGhvcjogTWFyY28gUG9sbyBCcmF2byBNb250aWVsDQpkYXRlOiAyMDIwLTA0LTIxDQotLS0NCg0KIyMjIExpYnJhcmllcw0KDQpgYGB7cn0NCmlmICghcmVxdWlyZSgicmVudiIpKSBpbnN0YWxsLnBhY2thZ2VzKCJyZW52IikNCmxpYnJhcnkocmVudikNCnJlbnY6OnJlc3RvcmUoKQ0KbGlicmFyeShoZXJlKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KGFycm93KQ0KYGBgDQoNCiMjIyBSZWFkIGRhdGENCg0KYGBge3J9DQp6aXBfZmlsZSA8LSBoZXJlKCJkYXRhIiwgInJhdyIsICJpdGVyXzAwX2NwdjIwMjBfY3N2LnppcCIpDQpgYGANCg0KYGBge3J9DQp0ZW1wX2RpciA8LSBoZXJlKCJ0ZW1wIikNCmRpci5jcmVhdGUodGVtcF9kaXIsIHNob3dXYXJuaW5ncyA9IEZBTFNFKQ0KDQp1bnppcCh6aXBfZmlsZSwgZmlsZXMgPSBjKCJpdGVyXzAwX2NwdjIwMjAvY29uanVudG9fZGVfZGF0b3MvY29uanVudG9fZGVfZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIsICJpdGVyXzAwX2NwdjIwMjAvZGljY2lvbmFyaW9fZGF0b3MvZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpLCBleGRpciA9IHRlbXBfZGlyKQ0KYGBgDQoNCmBgYHtyfQ0KDQpkYXRhX3BhdGggPC0gaGVyZSh0ZW1wX2RpciwNCiAgICAgICAgICAgICAgICAgIml0ZXJfMDBfY3B2MjAyMCIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvcyIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvc19pdGVyXzAwQ1NWMjAuY3N2IikNCg0KZGljdF9wYXRoIDwtIGhlcmUodGVtcF9kaXIsDQogICAgICAgICAgICAgICAgICJpdGVyXzAwX2NwdjIwMjAiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3MiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpDQoNCmluZm9fZGljdCA8LSByZWFkX2NzdihkaWN0X3BhdGgpDQpkZiA8LSByZWFkX2NzdihkYXRhX3BhdGgpDQoNCg0KdW5saW5rKHRlbXBfZGlyLCByZWN1cnNpdmUgPSBUUlVFKQ0KYGBgDQoNCmBgYHtyfQ0KIyBFeHBvcnRpbmcgZGljdGlvbmFyeSBmaWxlDQp3cml0ZV9jc3YoaW5mb19kaWN0LA0KICAgICAgICAgIGhlcmUoImRhdGEiLCAicmF3IiwgImRpY2Npb25hcmlvX2RhdG9zX2l0ZXJfMDBDU1YyMC5jc3YiKSkNCg0KYGBgDQoNCiMjIyBFeHBsb3JhdGlvbg0KDQpgYGB7cn0NCmhlYWQoZGYpDQpoZWFkKGluZm9fZGljdCkNCmBgYA0KDQpgYGB7cn0NCnN0cihkZikNCmBgYA0KDQpgYGB7cn0NCmNsZWFuX2luZm9fZGljdCA8LSBpbmZvX2RpY3RbLWMoMTozKSwgXQ0KbmFtZXMoY2xlYW5faW5mb19kaWN0KSA8LSBjbGVhbl9pbmZvX2RpY3RbMSwgXQ0KY2xlYW5faW5mb19kaWN0IDwtIGNsZWFuX2luZm9fZGljdFstMSxdDQpjbGVhbl9pbmZvX2RpY3QgPC0gY2xlYW5faW5mb19kaWN0WywgLWMoNzoxMCldDQoNCg0KY2xlYW5faW5mb19kaWN0DQpgYGANCg0KYGBge3J9DQp1bmlxdWVfc3RhdGVzIDwtIGRmIHw+IA0KICAgICAgZGlzdGluY3QoTk9NX0VOVCkNCg0Kd3JpdGVfY3N2KHVuaXF1ZV9zdGF0ZXMsIGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgImVudGl0eV9uYW1lcy5jc3YiKSkNCg0KDQp1bmlxdWVfc3RhdGVzDQpgYGANCg0KYGBge3J9DQplbnRpdGllc19jc3YgPC0gcmVhZF9jc3YoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAiZW50aXR5X25hbWVzLmNzdiIpKSB8PiBwdWxsKCkNCg0KcHJpbnQoZW50aXRpZXNfY3N2KQ0KYGBgDQoNCiMjIyBTZWxlY3Rpbmcgcm93cyB0aGF0IHdlJ2xsIGFuYWx5emUNCg0KYGBge3J9DQpyb3dzX3RvX2luY2x1ZGUgPC0gYygxOjEyLCA1MzoxMzIsIDEzNjoxNDAsIDE0NywgMTU1OjIxMSwgMjIwOjIzMikNCg0KZmlsdGVyZWRfZGF0YSA8LSBjbGVhbl9pbmZvX2RpY3QgfD4gDQogICAgICBmaWx0ZXIocm93X251bWJlcigpICVpbiUgcm93c190b19pbmNsdWRlKSB8PiANCiAgICAgIHB1bGwoNCkNCg0KZmlsdGVyZWRfZGF0YQ0KYGBgDQoNCmBgYHtyfQ0Kc2VsZWN0ZWRfZGYgPC0gZGYgfD4gDQogICAgICBzZWxlY3QoZmlsdGVyZWRfZGF0YSkNCg0Kc2VsZWN0ZWRfZGYNCmBgYA0KDQojIyMgRURBIGJlZm9yZSBleHBvcnRpbmcNCg0KYGBge3J9DQpzdHIoZGYpDQpgYGANCg0KIyMjIEV4cG9ydGluZyBhcyBwYXJxdWV0DQoNCmBgYHtyfQ0KIyBFeHBvcnQgd3JhbmdsZWQgZGF0YSBhcyBwYXJxdWV0IGZpbGUNCnRhYmxlIDwtIGFycm93OjpUYWJsZSRjcmVhdGUoc2VsZWN0ZWRfZGYpDQoNCm91dHB1dF9kaXIgPC0gaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikNCg0KYXJyb3c6OndyaXRlX2RhdGFzZXQodGFibGUsIG91dHB1dF9kaXIsIHBhcnRpdGlvbmluZyA9IGMoIk5PTV9FTlQiLCAiRU5USURBRCIpLCBleGlzdGluZ19kYXRhX2JlaGF2aW9yID0gIm92ZXJ3cml0ZSIpDQpgYGANCg0KIyMjIFJlYWRpbmcgcGFycXVldA0KDQpgYGB7cn0NCmRzIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4gDQogICAgICAgIGNvbGxlY3QoKQ0KDQpkcw0KYGBgDQoNCiMjIyBQdWVibGENCg0KYGBge3J9DQpkc19wdWVibGEgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iUHVlYmxhIikgfD4gDQogICAgY29sbGVjdCgpDQoNCmRzX3B1ZWJsYQ0KYGBgDQoNCiMjIyBZdWNhdMOhbg0KDQpgYGB7cn0NCmRzX3l1Y2F0YW4gPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iWXVjYXTDoW4iKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfeXVjYXRhbg0KYGBgDQoNCiMjIyBOdWV2byBMZcOzbg0KDQpgYGB7cn0NCmRzX251ZXZvX2xlb24gPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iTnVldm8gTGXDs24iKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfbnVldm9fbGVvbg0KYGBgDQoNCiMjIyBUb3RhbCBOYWNpb25hbA0KDQpgYGB7cn0NCmRzX25hY2lvbmFsIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4NCiAgICBmaWx0ZXIoTk9NX0VOVD09IlRvdGFsIG5hY2lvbmFsIikgfD4gDQogICAgY29sbGVjdCgpDQoNCmRzX25hY2lvbmFsDQpgYGANCg0KIyMjIFZlcmlmeSBkYXRhc2V0cyBhcmUgbm90IGVtcHR5DQoNCmBgYHtyfQ0KDQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSAwKSB7DQogICAgcHJpbnQocGFzdGUoIkRhdGFzZXQgaXMgZW1wdHkiLCB2YWx1ZSkpDQogIH0gZWxzZSB7DQogICAgICAgIHByaW50KHBhc3RlKCJPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSkpDQoNCiAgfQ0KDQp9DQpgYGANCg==